{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "f70abed7",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import re\n",
    "import pickle\n",
    "from collections import Counter\n",
    "from pathlib import Path"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "9d3e148a",
   "metadata": {},
   "outputs": [],
   "source": [
    "# # 给5加上_paper\n",
    "# df5 = pd.read_excel(r'F:\\work\\2024.8.5DK课题处理\\7个网络20250106\\原始数据\\5科学主题-科学主体边数据新.xlsx')\n",
    "# df5['source'] = df5['source'].apply(lambda x: x+'_paper')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "5470b0b6",
   "metadata": {},
   "outputs": [],
   "source": [
    "# df5.to_excel(r'F:\\work\\2024.8.5DK课题处理\\7个网络20250106\\原始数据\\5科学主题-科学主体边数据新.xlsx',index = False)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "44f9d6d7",
   "metadata": {},
   "source": [
    "#### 1.归一化"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "0a14cb23",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 设定要遍历的目录\n",
    "dir_path = Path(r'F:\\work\\2024.8.5DK课题处理\\7个网络20250106\\原始数据')\n",
    "\n",
    "path_ls = []\n",
    "# 遍历目录\n",
    "for path in dir_path.rglob('*'):  # rglob 会递归遍历所有子目录\n",
    "    if path.is_file():\n",
    "        if str(path).endswith('xlsx'):\n",
    "            path_ls.append(path)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "0e77b560",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[WindowsPath('F:/work/2024.8.5DK课题处理/7个网络20250106/原始数据/1引用网络边数据.xlsx'),\n",
       " WindowsPath('F:/work/2024.8.5DK课题处理/7个网络20250106/原始数据/2应用网络边数据.xlsx'),\n",
       " WindowsPath('F:/work/2024.8.5DK课题处理/7个网络20250106/原始数据/3合作网络边数据新.xlsx'),\n",
       " WindowsPath('F:/work/2024.8.5DK课题处理/7个网络20250106/原始数据/4转移网络边数据.xlsx'),\n",
       " WindowsPath('F:/work/2024.8.5DK课题处理/7个网络20250106/原始数据/5科学主题-科学主体边数据新.xlsx'),\n",
       " WindowsPath('F:/work/2024.8.5DK课题处理/7个网络20250106/原始数据/6技术主题-技术主体边数据新.xlsx'),\n",
       " WindowsPath('F:/work/2024.8.5DK课题处理/7个网络20250106/原始数据/7产业主题-产业主体边数据.xlsx')]"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "path_ls"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "907944e7",
   "metadata": {},
   "outputs": [],
   "source": [
    "def normalize_column_to_range(df, column_name, new_min, new_max):\n",
    "    \"\"\"\n",
    "    将DataFrame中的某一列归一化到指定范围 [new_min, new_max]。\n",
    "\n",
    "    参数：\n",
    "        df (DataFrame): 输入的Pandas DataFrame。\n",
    "        column_name (str): 要归一化的列名。\n",
    "        new_min (float): 目标范围的下限。\n",
    "        new_max (float): 目标范围的上限。\n",
    "\n",
    "    返回：\n",
    "        DataFrame: 归一化后的DataFrame，其中指定列已被归一化。\n",
    "    \"\"\"\n",
    "    data = df.copy()\n",
    "    old_min = data[column_name].min()  # 计算指定列的最小值\n",
    "    old_max = data[column_name].max()  # 计算指定列的最大值\n",
    "\n",
    "    # 避免分母为零的情况\n",
    "    if old_max == old_min:\n",
    "        data[column_name] = new_min  # 如果最大值等于最小值，直接设置为目标最小值\n",
    "    else:\n",
    "        # 按照归一化公式计算新列值\n",
    "        data[column_name] = new_min + (data[column_name] - old_min) / (old_max - old_min) * (new_max - new_min)\n",
    "\n",
    "    return data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "35a18946",
   "metadata": {},
   "outputs": [],
   "source": [
    "for file in path_ls:\n",
    "    df = pd.read_excel(file)\n",
    "    new_df = normalize_column_to_range(df, 'weight', 1, 10)\n",
    "    save_path = str(file).replace('原始数据',r'处理\\1归一化')\n",
    "    new_df.to_excel(save_path,index = False)\n",
    "#     break"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "4d590516",
   "metadata": {},
   "source": [
    "#### 2.加权"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "2f0cb648",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 将1和2的边乘以1.2\n",
    "df1 = pd.read_excel(r'F:\\work\\2024.8.5DK课题处理\\7个网络20250106\\处理\\1归一化\\1引用网络边数据.xlsx')\n",
    "df2 = pd.read_excel(r'F:\\work\\2024.8.5DK课题处理\\7个网络20250106\\处理\\1归一化\\2应用网络边数据.xlsx')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "21242123",
   "metadata": {},
   "outputs": [],
   "source": [
    "df1['weight'] = df1['weight'].apply(lambda x: x*1.2)\n",
    "df2['weight'] = df2['weight'].apply(lambda x: x*1.2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "7083dc26",
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>source</th>\n",
       "      <th>target</th>\n",
       "      <th>weight</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>S0免疫响应与病毒感染机理研究</td>\n",
       "      <td>T0疫苗制备与组分创新技术</td>\n",
       "      <td>12.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>S8寄生虫感染与免疫调控机制研究</td>\n",
       "      <td>T8疟疾疫苗抗原设计与开发</td>\n",
       "      <td>4.257620</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>S7自身免疫疾病发病机理探究</td>\n",
       "      <td>T1癌症免疫疗法肽类技术创新</td>\n",
       "      <td>2.726014</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>S4癌症免疫疗法原理与应用研究</td>\n",
       "      <td>T1癌症免疫疗法肽类技术创新</td>\n",
       "      <td>2.682281</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>S5结核病疫苗研发与免疫响应研究</td>\n",
       "      <td>T0疫苗制备与组分创新技术</td>\n",
       "      <td>2.556536</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>S0免疫响应与病毒感染机理研究</td>\n",
       "      <td>T5登革热与寨卡病毒疫苗研发</td>\n",
       "      <td>2.110639</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>S5结核病疫苗研发与免疫响应研究</td>\n",
       "      <td>T2纳米疫苗递送系统研发</td>\n",
       "      <td>1.931589</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>S6药物与基因递送系统机理解析</td>\n",
       "      <td>T2纳米疫苗递送系统研发</td>\n",
       "      <td>1.793760</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>S2疱疹病毒免疫逃逸机制研究</td>\n",
       "      <td>T0疫苗制备与组分创新技术</td>\n",
       "      <td>1.693098</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>S1肝炎病毒复制与感染机制研究</td>\n",
       "      <td>T0疫苗制备与组分创新技术</td>\n",
       "      <td>1.655639</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>S5结核病疫苗研发与免疫响应研究</td>\n",
       "      <td>T7Toll样受体激动剂疫苗佐剂研发</td>\n",
       "      <td>1.628188</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>S5结核病疫苗研发与免疫响应研究</td>\n",
       "      <td>T3疫苗存储与注射技术创新</td>\n",
       "      <td>1.594659</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>S5结核病疫苗研发与免疫响应研究</td>\n",
       "      <td>T4多糖蛋白结合疫苗技术</td>\n",
       "      <td>1.519422</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>S4癌症免疫疗法原理与应用研究</td>\n",
       "      <td>T0疫苗制备与组分创新技术</td>\n",
       "      <td>1.455328</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>S2疱疹病毒免疫逃逸机制研究</td>\n",
       "      <td>T5登革热与寨卡病毒疫苗研发</td>\n",
       "      <td>1.413929</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>S6药物与基因递送系统机理解析</td>\n",
       "      <td>T0疫苗制备与组分创新技术</td>\n",
       "      <td>1.398434</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>S8寄生虫感染与免疫调控机制研究</td>\n",
       "      <td>T0疫苗制备与组分创新技术</td>\n",
       "      <td>1.378592</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>S4癌症免疫疗法原理与应用研究</td>\n",
       "      <td>T19植物糖基转移酶工程技术</td>\n",
       "      <td>1.368341</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>S0免疫响应与病毒感染机理研究</td>\n",
       "      <td>T2纳米疫苗递送系统研发</td>\n",
       "      <td>1.362710</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>S2疱疹病毒免疫逃逸机制研究</td>\n",
       "      <td>T6中药复方制剂技术创新</td>\n",
       "      <td>1.339368</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20</th>\n",
       "      <td>S6药物与基因递送系统机理解析</td>\n",
       "      <td>T1癌症免疫疗法肽类技术创新</td>\n",
       "      <td>1.320378</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21</th>\n",
       "      <td>S0免疫响应与病毒感染机理研究</td>\n",
       "      <td>T1癌症免疫疗法肽类技术创新</td>\n",
       "      <td>1.307305</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22</th>\n",
       "      <td>S3感染与免疫抑制原理探讨</td>\n",
       "      <td>T0疫苗制备与组分创新技术</td>\n",
       "      <td>1.299530</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23</th>\n",
       "      <td>S7自身免疫疾病发病机理探究</td>\n",
       "      <td>T0疫苗制备与组分创新技术</td>\n",
       "      <td>1.284125</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24</th>\n",
       "      <td>S6药物与基因递送系统机理解析</td>\n",
       "      <td>T9神经退行性疾病疫苗研发</td>\n",
       "      <td>1.272905</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25</th>\n",
       "      <td>S16基因编辑原理与应用探索</td>\n",
       "      <td>T2纳米疫苗递送系统研发</td>\n",
       "      <td>1.266785</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>26</th>\n",
       "      <td>S0免疫响应与病毒感染机理研究</td>\n",
       "      <td>T16基孔肯雅病毒疫苗研发</td>\n",
       "      <td>1.246433</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>27</th>\n",
       "      <td>S2疱疹病毒免疫逃逸机制研究</td>\n",
       "      <td>T9神经退行性疾病疫苗研发</td>\n",
       "      <td>1.245687</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>28</th>\n",
       "      <td>S5结核病疫苗研发与免疫响应研究</td>\n",
       "      <td>T15人工核酸分子设计技术</td>\n",
       "      <td>1.244643</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29</th>\n",
       "      <td>S5结核病疫苗研发与免疫响应研究</td>\n",
       "      <td>T13多肽免疫增强剂研发</td>\n",
       "      <td>1.244643</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30</th>\n",
       "      <td>S8寄生虫感染与免疫调控机制研究</td>\n",
       "      <td>T6中药复方制剂技术创新</td>\n",
       "      <td>1.241330</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>31</th>\n",
       "      <td>S1肝炎病毒复制与感染机制研究</td>\n",
       "      <td>T7Toll样受体激动剂疫苗佐剂研发</td>\n",
       "      <td>1.233977</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>32</th>\n",
       "      <td>S5结核病疫苗研发与免疫响应研究</td>\n",
       "      <td>T1癌症免疫疗法肽类技术创新</td>\n",
       "      <td>1.230552</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>33</th>\n",
       "      <td>S7自身免疫疾病发病机理探究</td>\n",
       "      <td>T2纳米疫苗递送系统研发</td>\n",
       "      <td>1.221920</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>34</th>\n",
       "      <td>S14利什曼病免疫病理机制解析</td>\n",
       "      <td>T0疫苗制备与组分创新技术</td>\n",
       "      <td>1.218905</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>35</th>\n",
       "      <td>S2疱疹病毒免疫逃逸机制研究</td>\n",
       "      <td>T2纳米疫苗递送系统研发</td>\n",
       "      <td>1.217428</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>36</th>\n",
       "      <td>S0免疫响应与病毒感染机理研究</td>\n",
       "      <td>T10布鲁氏菌疫苗菌株鉴定技术</td>\n",
       "      <td>1.214709</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>37</th>\n",
       "      <td>S0免疫响应与病毒感染机理研究</td>\n",
       "      <td>T6中药复方制剂技术创新</td>\n",
       "      <td>1.212946</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>38</th>\n",
       "      <td>S13妊娠分娩与早产免疫机制研究</td>\n",
       "      <td>T0疫苗制备与组分创新技术</td>\n",
       "      <td>1.212219</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>39</th>\n",
       "      <td>S15病毒性心肌炎发病机理与治疗研究</td>\n",
       "      <td>T0疫苗制备与组分创新技术</td>\n",
       "      <td>1.212219</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>40</th>\n",
       "      <td>S0免疫响应与病毒感染机理研究</td>\n",
       "      <td>T7Toll样受体激动剂疫苗佐剂研发</td>\n",
       "      <td>1.210743</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>41</th>\n",
       "      <td>S3感染与免疫抑制原理探讨</td>\n",
       "      <td>T1癌症免疫疗法肽类技术创新</td>\n",
       "      <td>1.207113</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>42</th>\n",
       "      <td>S1肝炎病毒复制与感染机制研究</td>\n",
       "      <td>T1癌症免疫疗法肽类技术创新</td>\n",
       "      <td>1.206053</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>43</th>\n",
       "      <td>S4癌症免疫疗法原理与应用研究</td>\n",
       "      <td>T7Toll样受体激动剂疫苗佐剂研发</td>\n",
       "      <td>1.205909</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44</th>\n",
       "      <td>S20幽门螺杆菌感染与胃部疾病机理研究</td>\n",
       "      <td>T0疫苗制备与组分创新技术</td>\n",
       "      <td>1.205533</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>45</th>\n",
       "      <td>S0免疫响应与病毒感染机理研究</td>\n",
       "      <td>T3疫苗存储与注射技术创新</td>\n",
       "      <td>1.203945</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>46</th>\n",
       "      <td>S5结核病疫苗研发与免疫响应研究</td>\n",
       "      <td>T6中药复方制剂技术创新</td>\n",
       "      <td>1.203935</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>47</th>\n",
       "      <td>S16基因编辑原理与应用探索</td>\n",
       "      <td>T0疫苗制备与组分创新技术</td>\n",
       "      <td>1.202190</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>48</th>\n",
       "      <td>S0免疫响应与病毒感染机理研究</td>\n",
       "      <td>T9神经退行性疾病疫苗研发</td>\n",
       "      <td>1.201491</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>49</th>\n",
       "      <td>S1肝炎病毒复制与感染机制研究</td>\n",
       "      <td>T5登革热与寨卡病毒疫苗研发</td>\n",
       "      <td>1.200281</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50</th>\n",
       "      <td>S4癌症免疫疗法原理与应用研究</td>\n",
       "      <td>T2纳米疫苗递送系统研发</td>\n",
       "      <td>1.200248</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>51</th>\n",
       "      <td>S2疱疹病毒免疫逃逸机制研究</td>\n",
       "      <td>T1癌症免疫疗法肽类技术创新</td>\n",
       "      <td>1.200048</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>52</th>\n",
       "      <td>S4癌症免疫疗法原理与应用研究</td>\n",
       "      <td>T5登革热与寨卡病毒疫苗研发</td>\n",
       "      <td>1.200000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                 source              target     weight\n",
       "0       S0免疫响应与病毒感染机理研究       T0疫苗制备与组分创新技术  12.000000\n",
       "1      S8寄生虫感染与免疫调控机制研究       T8疟疾疫苗抗原设计与开发   4.257620\n",
       "2        S7自身免疫疾病发病机理探究      T1癌症免疫疗法肽类技术创新   2.726014\n",
       "3       S4癌症免疫疗法原理与应用研究      T1癌症免疫疗法肽类技术创新   2.682281\n",
       "4      S5结核病疫苗研发与免疫响应研究       T0疫苗制备与组分创新技术   2.556536\n",
       "5       S0免疫响应与病毒感染机理研究      T5登革热与寨卡病毒疫苗研发   2.110639\n",
       "6      S5结核病疫苗研发与免疫响应研究        T2纳米疫苗递送系统研发   1.931589\n",
       "7       S6药物与基因递送系统机理解析        T2纳米疫苗递送系统研发   1.793760\n",
       "8        S2疱疹病毒免疫逃逸机制研究       T0疫苗制备与组分创新技术   1.693098\n",
       "9       S1肝炎病毒复制与感染机制研究       T0疫苗制备与组分创新技术   1.655639\n",
       "10     S5结核病疫苗研发与免疫响应研究  T7Toll样受体激动剂疫苗佐剂研发   1.628188\n",
       "11     S5结核病疫苗研发与免疫响应研究       T3疫苗存储与注射技术创新   1.594659\n",
       "12     S5结核病疫苗研发与免疫响应研究        T4多糖蛋白结合疫苗技术   1.519422\n",
       "13      S4癌症免疫疗法原理与应用研究       T0疫苗制备与组分创新技术   1.455328\n",
       "14       S2疱疹病毒免疫逃逸机制研究      T5登革热与寨卡病毒疫苗研发   1.413929\n",
       "15      S6药物与基因递送系统机理解析       T0疫苗制备与组分创新技术   1.398434\n",
       "16     S8寄生虫感染与免疫调控机制研究       T0疫苗制备与组分创新技术   1.378592\n",
       "17      S4癌症免疫疗法原理与应用研究      T19植物糖基转移酶工程技术   1.368341\n",
       "18      S0免疫响应与病毒感染机理研究        T2纳米疫苗递送系统研发   1.362710\n",
       "19       S2疱疹病毒免疫逃逸机制研究        T6中药复方制剂技术创新   1.339368\n",
       "20      S6药物与基因递送系统机理解析      T1癌症免疫疗法肽类技术创新   1.320378\n",
       "21      S0免疫响应与病毒感染机理研究      T1癌症免疫疗法肽类技术创新   1.307305\n",
       "22        S3感染与免疫抑制原理探讨       T0疫苗制备与组分创新技术   1.299530\n",
       "23       S7自身免疫疾病发病机理探究       T0疫苗制备与组分创新技术   1.284125\n",
       "24      S6药物与基因递送系统机理解析       T9神经退行性疾病疫苗研发   1.272905\n",
       "25       S16基因编辑原理与应用探索        T2纳米疫苗递送系统研发   1.266785\n",
       "26      S0免疫响应与病毒感染机理研究       T16基孔肯雅病毒疫苗研发   1.246433\n",
       "27       S2疱疹病毒免疫逃逸机制研究       T9神经退行性疾病疫苗研发   1.245687\n",
       "28     S5结核病疫苗研发与免疫响应研究       T15人工核酸分子设计技术   1.244643\n",
       "29     S5结核病疫苗研发与免疫响应研究        T13多肽免疫增强剂研发   1.244643\n",
       "30     S8寄生虫感染与免疫调控机制研究        T6中药复方制剂技术创新   1.241330\n",
       "31      S1肝炎病毒复制与感染机制研究  T7Toll样受体激动剂疫苗佐剂研发   1.233977\n",
       "32     S5结核病疫苗研发与免疫响应研究      T1癌症免疫疗法肽类技术创新   1.230552\n",
       "33       S7自身免疫疾病发病机理探究        T2纳米疫苗递送系统研发   1.221920\n",
       "34      S14利什曼病免疫病理机制解析       T0疫苗制备与组分创新技术   1.218905\n",
       "35       S2疱疹病毒免疫逃逸机制研究        T2纳米疫苗递送系统研发   1.217428\n",
       "36      S0免疫响应与病毒感染机理研究     T10布鲁氏菌疫苗菌株鉴定技术   1.214709\n",
       "37      S0免疫响应与病毒感染机理研究        T6中药复方制剂技术创新   1.212946\n",
       "38     S13妊娠分娩与早产免疫机制研究       T0疫苗制备与组分创新技术   1.212219\n",
       "39   S15病毒性心肌炎发病机理与治疗研究       T0疫苗制备与组分创新技术   1.212219\n",
       "40      S0免疫响应与病毒感染机理研究  T7Toll样受体激动剂疫苗佐剂研发   1.210743\n",
       "41        S3感染与免疫抑制原理探讨      T1癌症免疫疗法肽类技术创新   1.207113\n",
       "42      S1肝炎病毒复制与感染机制研究      T1癌症免疫疗法肽类技术创新   1.206053\n",
       "43      S4癌症免疫疗法原理与应用研究  T7Toll样受体激动剂疫苗佐剂研发   1.205909\n",
       "44  S20幽门螺杆菌感染与胃部疾病机理研究       T0疫苗制备与组分创新技术   1.205533\n",
       "45      S0免疫响应与病毒感染机理研究       T3疫苗存储与注射技术创新   1.203945\n",
       "46     S5结核病疫苗研发与免疫响应研究        T6中药复方制剂技术创新   1.203935\n",
       "47       S16基因编辑原理与应用探索       T0疫苗制备与组分创新技术   1.202190\n",
       "48      S0免疫响应与病毒感染机理研究       T9神经退行性疾病疫苗研发   1.201491\n",
       "49      S1肝炎病毒复制与感染机制研究      T5登革热与寨卡病毒疫苗研发   1.200281\n",
       "50      S4癌症免疫疗法原理与应用研究        T2纳米疫苗递送系统研发   1.200248\n",
       "51       S2疱疹病毒免疫逃逸机制研究      T1癌症免疫疗法肽类技术创新   1.200048\n",
       "52      S4癌症免疫疗法原理与应用研究      T5登革热与寨卡病毒疫苗研发   1.200000"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "825713a1",
   "metadata": {},
   "outputs": [],
   "source": [
    "df1.to_excel(r'F:\\work\\2024.8.5DK课题处理\\7个网络20250106\\处理\\2加权\\1引用网络边数据.xlsx',index = False)\n",
    "df2.to_excel(r'F:\\work\\2024.8.5DK课题处理\\7个网络20250106\\处理\\2加权\\2应用网络边数据.xlsx',index = False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d77afc5b",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "07b434f5",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 将3和4的边乘以0.8\n",
    "df3 = pd.read_excel(r'F:\\work\\2024.8.5DK课题处理\\7个网络20250106\\处理\\1归一化\\3合作网络边数据新.xlsx')\n",
    "df4 = pd.read_excel(r'F:\\work\\2024.8.5DK课题处理\\7个网络20250106\\处理\\1归一化\\4转移网络边数据.xlsx')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "298b35ff",
   "metadata": {},
   "outputs": [],
   "source": [
    "df3['weight'] = df3['weight'].apply(lambda x: x*0.8)\n",
    "df4['weight'] = df4['weight'].apply(lambda x: x*0.8)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "0c5dba94",
   "metadata": {},
   "outputs": [],
   "source": [
    "df3.to_excel(r'F:\\work\\2024.8.5DK课题处理\\7个网络20250106\\处理\\2加权\\3合作网络边数据.xlsx',index = False)\n",
    "df4.to_excel(r'F:\\work\\2024.8.5DK课题处理\\7个网络20250106\\处理\\2加权\\4转移网络边数据.xlsx',index = False)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "73c8f046",
   "metadata": {},
   "source": [
    "#### 3.总网络"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "c3afadb5",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 设定要遍历的目录\n",
    "dir_path = Path(r'F:\\work\\2024.8.5DK课题处理\\7个网络20250106\\处理\\2加权')\n",
    "\n",
    "path_ls = []\n",
    "# 遍历目录\n",
    "for path in dir_path.rglob('*'):  # rglob 会递归遍历所有子目录\n",
    "    if path.is_file():\n",
    "        if str(path).endswith('xlsx'):\n",
    "            path_ls.append(path)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "id": "c5f55f6b",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[WindowsPath('F:/work/2024.8.5DK课题处理/7个网络20250106/处理/2加权/1引用网络边数据.xlsx'),\n",
       " WindowsPath('F:/work/2024.8.5DK课题处理/7个网络20250106/处理/2加权/2应用网络边数据.xlsx'),\n",
       " WindowsPath('F:/work/2024.8.5DK课题处理/7个网络20250106/处理/2加权/3合作网络边数据.xlsx'),\n",
       " WindowsPath('F:/work/2024.8.5DK课题处理/7个网络20250106/处理/2加权/4转移网络边数据.xlsx'),\n",
       " WindowsPath('F:/work/2024.8.5DK课题处理/7个网络20250106/处理/2加权/5科学主题-科学主体边数据新.xlsx'),\n",
       " WindowsPath('F:/work/2024.8.5DK课题处理/7个网络20250106/处理/2加权/6技术主题-技术主体边数据新.xlsx'),\n",
       " WindowsPath('F:/work/2024.8.5DK课题处理/7个网络20250106/处理/2加权/7产业主题-产业主体边数据.xlsx')]"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "path_ls"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "c77664a3",
   "metadata": {},
   "outputs": [],
   "source": [
    "df_ls = []\n",
    "for file in path_ls:\n",
    "    df = pd.read_excel(file)\n",
    "    df_ls.append(df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "id": "55b5c253",
   "metadata": {},
   "outputs": [],
   "source": [
    "all_df = pd.concat(df_ls)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "id": "8c05caca",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>source</th>\n",
       "      <th>target</th>\n",
       "      <th>weight</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>49</th>\n",
       "      <td>深圳康泰生物制品股份有限公司_product</td>\n",
       "      <td>I15灭活新冠疫苗</td>\n",
       "      <td>4.306123</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50</th>\n",
       "      <td>长春百克生物科技股份公司_product</td>\n",
       "      <td>I24疱疹疫苗</td>\n",
       "      <td>7.508936</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>51</th>\n",
       "      <td>浙江普康生物技术股份有限公司_product</td>\n",
       "      <td>I13肝炎疫苗</td>\n",
       "      <td>3.772326</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>52</th>\n",
       "      <td>浙江天元生物药业有限公司_product</td>\n",
       "      <td>I0甲型流感病毒疫苗</td>\n",
       "      <td>1.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>53</th>\n",
       "      <td>浙江天元生物药业有限公司_product</td>\n",
       "      <td>I2百白破等多联疫苗</td>\n",
       "      <td>1.231431</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>54</th>\n",
       "      <td>浙江天元生物药业有限公司_product</td>\n",
       "      <td>I11脑膜炎球菌多糖疫苗</td>\n",
       "      <td>3.357144</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>55</th>\n",
       "      <td>中国生物技术股份有限公司_product</td>\n",
       "      <td>I0甲型流感病毒疫苗</td>\n",
       "      <td>1.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>56</th>\n",
       "      <td>中国生物技术股份有限公司_product</td>\n",
       "      <td>I8脊髓灰质炎疫苗</td>\n",
       "      <td>2.526779</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>57</th>\n",
       "      <td>中国生物技术股份有限公司_product</td>\n",
       "      <td>I21水痘减毒活疫苗</td>\n",
       "      <td>5.017873</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>58</th>\n",
       "      <td>中逸安科生物技术股份有限公司_product</td>\n",
       "      <td>I3四价流感病毒疫苗</td>\n",
       "      <td>1.794134</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                    source        target    weight\n",
       "49  深圳康泰生物制品股份有限公司_product     I15灭活新冠疫苗  4.306123\n",
       "50    长春百克生物科技股份公司_product       I24疱疹疫苗  7.508936\n",
       "51  浙江普康生物技术股份有限公司_product       I13肝炎疫苗  3.772326\n",
       "52    浙江天元生物药业有限公司_product    I0甲型流感病毒疫苗  1.000000\n",
       "53    浙江天元生物药业有限公司_product    I2百白破等多联疫苗  1.231431\n",
       "54    浙江天元生物药业有限公司_product  I11脑膜炎球菌多糖疫苗  3.357144\n",
       "55    中国生物技术股份有限公司_product    I0甲型流感病毒疫苗  1.000000\n",
       "56    中国生物技术股份有限公司_product     I8脊髓灰质炎疫苗  2.526779\n",
       "57    中国生物技术股份有限公司_product    I21水痘减毒活疫苗  5.017873\n",
       "58  中逸安科生物技术股份有限公司_product    I3四价流感病毒疫苗  1.794134"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "all_df.tail(10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "id": "31c0ef56",
   "metadata": {},
   "outputs": [],
   "source": [
    "all_df.to_excel(r'F:\\work\\2024.8.5DK课题处理\\7个网络20250106\\处理\\3超网络边数据文件.xlsx',index = False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "030ec358",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f01a6710",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.9"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
